import os
import pandas as pd

class CsvUtil:
    def __init__(self, origin_folder_path, result_folder_path):
        self.origin_folder_path = origin_folder_path
        self.result_folder_path = result_folder_path
        self.csv_files = [file for file in os.listdir(origin_folder_path) if file.endswith('.csv')]

    def merge_csv(self, merged_file_name):
        merged_data = pd.read_csv(os.path.join(self.origin_folder_path, self.csv_files[0]))
        for file in self.csv_files[1:]:
            file_path = os.path.join(self.origin_folder_path, file)
            data_to_append = pd.read_csv(file_path)
            merged_data = pd.concat([merged_data, data_to_append], ignore_index=True)
        # 提取合并后的文件名，这里使用 "merged.csv"
        output_file = os.path.join(self.result_folder_path, merged_file_name+ ".csv")
        merged_data.to_csv(output_file, index=False)
        return output_file

    # 拆分文件成指定份数，返回拆分后的文件数，并在指定目录下生成拆分后的文件
    def split_csv(self, file_name, split_size):
        file_path = os.path.join(self.origin_folder_path, file_name)
        data = pd.read_csv(file_path)
        data_size = len(data)
        split_num = data_size // split_size
        if data_size % split_size != 0:
            split_num += 1
        for i in range(split_num):
            start = i * split_size
            end = start + split_size
            if i == split_num - 1:
                end = data_size
            split_data = data[start:end]
            output_file = os.path.join(self.result_folder_path, f"split_{i}.csv")
            split_data.to_csv(output_file, index=False)
        return split_num

if __name__ == '__main__':
    origin_folder_path = r'../DataSet/origin'  # 更换为你的文件夹路径
    result_folder_path = r'../DataSet/result'  # 更换为你的文件夹路径
    merged_file_name = "merged"
    csv_util = CsvUtil(origin_folder_path, result_folder_path)
    merged_file = csv_util.merge_csv(merged_file_name)
    print(f"合并后的文件保存在：{merged_file}")


    # origin_folder_path = r'DataSet/origin'  # 更换为你的文件夹路径
    # csv_util = CsvUtil(folder_path)
    # file_name = "merged.csv"
    # split_size = 1000
    # split_num = csv_util.split_csv(file_name, split_size)
    # print(f"拆分后的文件保存在：{folder_path}，共{split_num}个文件")

